package snippets.cse524.activeDates;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;

import org.apache.commons.lang3.StringUtils;

import snippets.utils.Utils;

public class FilterEntitiesMR {

	/**
	 * @param args
	 * @throws FileNotFoundException
	 */
	public static void main(String[] args) throws FileNotFoundException {
		// TODO Auto-generated method stub
		System.err.println("Reading canonical names ...");
		Map<String, String> canonicalNames = Utils.readCanonicalNames(new FileInputStream(
				new File(args[0])));

		System.err.println("Reading news mappings ...");
		Scanner in = new Scanner(new File(args[1]));

		while (in.hasNextLine()) {
			String line = in.nextLine();

			String[] parts = line.split(":");

			if (parts.length != 2)
				continue;
			String source = parts[0].trim();
			String entity = parts[1].trim().toLowerCase();

			if (canonicalNames.containsKey(entity)) {
				System.out.println(StringUtils.join(new String[] { source,
						canonicalNames.get(entity) }, ":"));
			}
		}
	}
}
